import jieba
import re
from collections import Counter

# 假设我们有一段文本
with open('cipin.txt',encoding='utf-8') as f:
    text = f.read()


# 加载自定义词典
jieba.load_userdict('data.txt')

# 使用 jieba 分词
words = jieba.lcut(text, cut_all=True)
filtered_words = [word for word in words if not re.search(r'[，。！？、【】、；：“”‘’《》〈〉（）〔〕]' and not '\n', word)]
print(filtered_words)

""" word_counts = Counter(words)

# 输出词频
for word, count in word_counts.items():
    print(f"{word}: {count}") """
